library(here)
# /*===== R =====*/
source(here("GitControlled/Codes/0_1_ls_packages.R"))
source(here("GitControlled/Codes/0_0_functions.R"))
# /*===== python =====*/
# --- Python module --- #
source_python(here("GitControlled/Codes/0_0_import_modules.py"))
# --- CF-DML --- #
source_python(here("GitControlled/Codes/run_cf_dml.py"))
# --- DR-ORF --- #
source_python(here("GitControlled/Codes/run_dr_orf.py"))
# --- DML-ORF --- #
source_python(here("GitControlled/Codes/run_dml_orf.py"))
# /*===== Data (allocation period 3, 2008-2012 )=====*/
# --- panel data--- #
reg_data <- readRDS(here("Shared/Data/WaterAnalysis/comp_reg_dt.rds")) %>%
.[year %in% 2008:2012 & usage <= 40,]
# library(caret)
# set.seed(3456)
# trainIndex <- createDataPartition(reg_data$treat2, p = .5,
# list = FALSE,
# times = 1)
# sample1 <- reg_data[trainIndex,]
# sample1[,.N, by="treat2"]
# /*===========================================*/
#'= Function for Data Preparation=
# /*===========================================*/
prep_data <-
function(data, het_vars){
# data = sample1; het_vars = c("pr_in", "gdd_in","pet_in")
# data = sample1; het_vars = "pr_in"
# /*===== variables =====*/
cov_ls <- c(
# --- weather --- #
"pr_in", "gdd_in","pet_in",
# --- soil --- #
"silttotal_r", "claytotal_r", "slope_r", "ksat_r", "awc_r"
)
#/*--------------------------------*/
#' ## Training data
#/*--------------------------------*/
# --- dependent variable --- #
Y <- data[, usage] %>% as.array() %>% unname()
# --- treatment indicator --- #
T <- data[, treat2] %>% as.array() %>% unname()
# --- heterogeneous impact dirivers --- #
X <- data[, ..het_vars] %>% as.matrix() %>% unname()
# --- confounders/controls --- #
W <- data[, ..cov_ls] %>% as.matrix() %>% unname()
#/*--------------------------------*/
#' ## Testing data
#/*--------------------------------*/
X_test <-
lapply(
het_vars,
function(x) gen_pred_data(data, het_vars, target_var = x)
) %>%
bind_rows()
# .[,!"target_var"] %>%
# as.matrix() %>%
# unname()
return(
list(Y = Y, X = X, T = T, W = W, X_test = X_test)
)
}
# /*===========================================*/
#'= Function to run EconML =
# /*===========================================*/
#/*--------------------------------*/
#' ## DR-ORF
#/*--------------------------------*/
run_DR_orf_comp <- function(data, het_vars) {
# data=sample1; het_vars= "pet_in"
# data=sample1; het_vars= c("pr_in", "gdd_in","pet_in")
dt <- prep_data(data, het_vars)
Y <- dt$Y
T <- dt$T
X <- dt$X
W <- dt$W
X_test <-
dt$X_test %>%
.[,!"target_var"] %>%
as.matrix() %>%
unname()
res_dr_orf <-
run_dr_orf(
Y = Y,
T = T,
X = X,
W = W,
X_test = X_test
)
temp <-
dt$X_test %>%
melt(id.var='target_var') %>%
.[target_var==variable,]
res_X_test_drorf <-
data.table(
x = temp[, value],
pred_te = res_dr_orf[[1]] %>% c(),
te_lower = res_dr_orf[[2]] %>% c(),
te_upper = res_dr_orf[[3]] %>% c(),
variable = dt$X_test[, target_var]
)
# ggplot(res_X_test_drorf) +
# geom_line(aes(x=x, y=pred_te)) +
# # geom_smooth(aes(x=x, y=pred_te), se = FALSE) +
# geom_ribbon(aes(x=x, ymin=te_lower, ymax=te_upper), alpha=0.4) +
# facet_grid(~variable, scale = "free")
return(res_X_test_drorf)
}
#/*--------------------------------*/
#' ## DML-ORF
#/*--------------------------------*/
run_DML_orf_comp <- function(data, het_vars) {
# data=sample1; het_vars= "pet_in"
# data=sample1; het_vars= c("pr_in", "gdd_in","pet_in")
dt <- prep_data(data, het_vars)
Y <- dt$Y
T <- dt$T
X <- dt$X
W <- dt$W
X_test <-
dt$X_test %>%
.[,!"target_var"] %>%
as.matrix() %>%
unname()
#/*--------------------------------*/
#' ## Regression Analysis
#/*--------------------------------*/
res_dml_orf <-
run_dml_orf(
Y = Y,
T = T,
X = X,
W = W,
X_test = X_test
)
temp <-
dt$X_test %>%
melt(id.var='target_var') %>%
.[target_var==variable,]
res_X_test_dmlorf <-
data.table(
x = temp[, value],
pred_te = res_dml_orf[[1]] %>% c(),
te_lower = res_dml_orf[[2]] %>% c(),
te_upper = res_dml_orf[[3]] %>% c(),
variable = dt$X_test[, target_var]
)
# ggplot(res_X_test_dmlorf) +
# geom_line(aes(x=x, y=pred_te)) +
# # geom_smooth(aes(x=x, y=pred_te), se = FALSE) +
# geom_ribbon(aes(x=x, ymin=te_lower, ymax=te_upper), alpha=0.4)
return(res_X_test_dmlorf)
}
n_trees=1000min_leaf_size=10max_depth = 50subsample_ratio = 0.7propensity_model = LogisticRegression(C=1, penalty='l1', solver='saga', multi_class='auto')model_Y=LassoCV(cv=3)propensity_model_final=LogisticRegressionCV(cv=3, penalty='l1', solver='saga', multi_class='auto')model_Y_final=WeightedLassoCV(cv=3)n_trees=1000min_leaf_size=10max_depth=50subsample_ratio = 0.7model_T=LogisticRegression(C=1, solver='saga', multi_class='auto')model_Y=Lasso(alpha=0.01)model_T_final=LogisticRegressionCV(cv=3, solver='saga', multi_class='auto')model_Y_final=WeightedLassoCV(cv=3)discrete_treatment=TrueFor DML-ORF, if discrete_treatment=True (if treatment is
is binary), model_T (The estimator for fitting the
treatment to the features) is treated as a classifier that gives the
probabilities for the treatment (such as LogisticRegression,
RandomForestClassifier, GradientBoostingClassifier).
# === Run DR-ORF === #
res_DR_orf_joint <-
run_DR_orf_comp(data=reg_data, het_vars = c("pr_in", "gdd_in","pet_in"))
saveRDS(res_DR_orf_joint, here("Shared/Results/resRegression/res_DR_orf_joint.rds"))
ggplot(res_DR_orf_joint) +
geom_line(aes(x=x, y=pred_te)) +
geom_ribbon(aes(x=x, ymin=te_lower, ymax=te_upper), alpha=0.4) +
facet_grid(~variable, scale = "free")
# === Run DML-ORF === #
res_DML_orf_joint <-
run_DML_orf_comp(data=reg_data, het_vars = c("pr_in", "gdd_in","pet_in"))
saveRDS(res_DML_orf_joint, here("Shared/Results/resRegression/res_DML_orf_joint.rds"))
ggplot(res_DML_orf_joint) +
geom_line(aes(x=x, y=pred_te)) +
geom_ribbon(aes(x=x, ymin=te_lower, ymax=te_upper), alpha=0.4) +
facet_grid(~variable, scale = "free")
res_DR_orf_joint <-
here("Shared/Results/resRegression/res_DR_orf_joint.rds") %>%
readRDS()
res_DML_orf_joint <-
here("Shared/Results/resRegression/res_DML_orf_joint.rds") %>%
readRDS()
res_joint <-
bind_rows(res_DR_orf_joint, res_DML_orf_joint, .id = "type") %>%
.[,type:=ifelse(type==1, "DR-ORF", "DML-ORF")]
ggplot(res_joint) +
geom_line(aes(x=x, y=pred_te)) +
geom_ribbon(aes(x=x, ymin=te_lower, ymax=te_upper), alpha=0.4) +
facet_grid(type~variable, scale = "free_x")+
labs(title = "Estimate the Impact of pet_in, gdd_in, and pr_in, separately") +
ylab("Treatment Effect (inches)")
# === Run DR-ORF === #
res_DR_orf_sep <-
lapply(
c("pr_in", "gdd_in","pet_in"),
function(x) run_DR_orf_comp(data=reg_data, het_vars = x)) %>%
bind_rows()
saveRDS(res_DR_orf_sep, here("Shared/Results/resRegression/res_DR_orf_sep.rds"))
# ggplot(res_DR_orf_sep) +
# geom_line(aes(x=x, y=pred_te)) +
# geom_ribbon(aes(x=x, ymin=te_lower, ymax=te_upper), alpha=0.4) +
# facet_grid(~variable, scale = "free")
# === Run DML-ORF === #
res_DML_orf_sep <-
lapply(c("pr_in", "gdd_in","pet_in"),
function(x) run_DML_orf_comp(data=reg_data, het_vars = x)) %>%
bind_rows()
saveRDS(res_DML_orf_sep, here("Shared/Results/resRegression/res_DML_orf_sep.rds"))
# ggplot(res_DML_orf_sep) +
# geom_line(aes(x=x, y=pred_te)) +
# geom_ribbon(aes(x=x, ymin=te_lower, ymax=te_upper), alpha=0.4) +
# facet_grid(~variable, scale = "free")
res_DR_orf_sep <-
here("Shared/Results/resRegression/res_DR_orf_sep.rds") %>%
readRDS()
res_DML_orf_sep <-
here("Shared/Results/resRegression/res_DML_orf_sep.rds") %>%
readRDS()
res_sep <-
bind_rows(res_DR_orf_sep, res_DML_orf_sep, .id = "type") %>%
.[,type:=ifelse(type==1, "DR-ORF", "DML-ORF")]
ggplot(res_sep) +
geom_line(aes(x=x, y=pred_te)) +
geom_ribbon(aes(x=x, ymin=te_lower, ymax=te_upper), alpha=0.4) +
facet_grid(type~variable, scale = "free_x")+
labs(title = "Estimate the Impact of pet_in, gdd_in, and pr_in, separately") +
ylab("Treatment Effect (inches)")
res_cf_DML_sep <- readRDS(here("Shared/Results/resRegression/res_cf_DML_sep.rds"))
ggplot(res_cf_DML_sep) +
geom_line(aes(x=x, y=pred_te)) +
geom_ribbon(aes(x=x, ymin=te_lower, ymax=te_upper), alpha=0.4) +
facet_grid(~variable, scale = "free")